Scoring gene set activity

loading packages and data

data <- Read10X(data.dir = "data/pbmc3k_filtered_gene_bc_matrices/hg19")
object <- CreateSeuratObject(counts = data, min.cells = 3, min.features = 200)
object
An object of class Seurat 
13714 features across 2700 samples within 1 assay 
Active assay: RNA (13714 features, 0 variable features)
 1 layer present: counts

Gene set activity

Data pre-processing

ann = read.table(file = 'data/pbmc3k_seurat_annotation.txt', row.names = 1, 
                 header = TRUE, sep = '\t')
head(ann)
                 seurat_annotations
AAACATACAACCAC-1       Memory_CD4_T
AAACATTGAGCTAC-1                  B
AAACATTGATCAGC-1       Memory_CD4_T
AAACCGTGCTTCCG-1         CD14+_Mono
AAACCGTGTATGCG-1                 NK
AAACGCACTGGTAC-1       Memory_CD4_T
gs <- readLines('data/B_cell_activation.txt')
grate <- getGeneRate(background.geneset = 'data/Tres.kegg', signature.geneset = gs, mode = "single")
head(grate)
       background signature1
ACSS2 0.016129032          0
GCK   0.037634409          0
PGK2  0.005376344          0
PGK1  0.005376344          0
PDHB  0.026881720          0
PDHA1 0.026881720          0
object <- rebuildMatrix(object, features = NULL, assay = "RNA", slot = "data", method = 'pca', dim = 20, avc = NULL)
object <- rebuildMatrix(object, features = NULL, assay = "RNA", slot = "data", method = 'nmf', dim = 20, avc = NULL)

iter |      tol 
---------------
   1 | 8.81e-01
   2 | 7.40e-02
   3 | 2.36e-02
   4 | 1.00e-02
   5 | 5.46e-03
   6 | 3.53e-03
   7 | 2.56e-03
   8 | 2.00e-03
   9 | 1.64e-03
  10 | 1.38e-03
  11 | 1.18e-03
  12 | 1.02e-03
  13 | 8.73e-04
  14 | 7.51e-04
  15 | 6.50e-04
  16 | 5.65e-04
  17 | 4.97e-04
  18 | 4.40e-04
  19 | 3.94e-04
  20 | 3.54e-04
  21 | 3.19e-04
  22 | 2.87e-04
  23 | 2.60e-04
  24 | 2.39e-04
  25 | 2.22e-04
  26 | 2.08e-04
  27 | 1.94e-04
  28 | 1.81e-04
  29 | 1.69e-04
  30 | 1.59e-04
  31 | 1.49e-04
  32 | 1.39e-04
  33 | 1.32e-04
  34 | 1.25e-04
  35 | 1.19e-04
  36 | 1.13e-04
  37 | 1.06e-04
  38 | 1.01e-04
  39 | 9.67e-05
  40 | 9.23e-05
  41 | 8.82e-05
  42 | 8.46e-05
  43 | 8.15e-05
  44 | 7.86e-05
  45 | 7.57e-05
  46 | 7.32e-05
  47 | 7.09e-05
  48 | 6.85e-05
  49 | 6.59e-05
  50 | 6.31e-05
  51 | 6.02e-05
  52 | 5.73e-05
  53 | 5.47e-05
  54 | 5.21e-05
  55 | 4.95e-05
  56 | 4.71e-05
  57 | 4.49e-05
  58 | 4.29e-05
  59 | 4.10e-05
  60 | 3.95e-05
  61 | 3.80e-05
  62 | 3.66e-05
  63 | 3.54e-05
  64 | 3.42e-05
  65 | 3.31e-05
  66 | 3.22e-05
  67 | 3.13e-05
  68 | 3.05e-05
  69 | 2.97e-05
  70 | 2.87e-05
  71 | 2.79e-05
  72 | 2.71e-05
  73 | 2.63e-05
  74 | 2.56e-05
  75 | 2.50e-05
  76 | 2.45e-05
  77 | 2.39e-05
  78 | 2.34e-05
  79 | 2.30e-05
  80 | 2.26e-05
  81 | 2.23e-05
  82 | 2.20e-05
  83 | 2.18e-05
  84 | 2.16e-05
  85 | 2.14e-05
  86 | 2.12e-05
  87 | 2.10e-05
  88 | 2.07e-05
  89 | 2.05e-05
  90 | 2.03e-05
  91 | 2.01e-05
  92 | 1.99e-05
  93 | 1.96e-05
  94 | 1.92e-05
  95 | 1.89e-05
  96 | 1.85e-05
  97 | 1.82e-05
  98 | 1.79e-05
  99 | 1.77e-05
 100 | 1.73e-05
 101 | 1.70e-05
 102 | 1.67e-05
 103 | 1.65e-05
 104 | 1.63e-05
 105 | 1.61e-05
 106 | 1.60e-05
 107 | 1.57e-05
 108 | 1.54e-05
 109 | 1.51e-05
 110 | 1.49e-05
 111 | 1.47e-05
 112 | 1.45e-05
 113 | 1.44e-05
 114 | 1.44e-05
 115 | 1.43e-05
 116 | 1.43e-05
 117 | 1.43e-05
 118 | 1.43e-05
 119 | 1.42e-05
 120 | 1.41e-05
 121 | 1.41e-05
 122 | 1.40e-05
 123 | 1.39e-05
 124 | 1.38e-05
 125 | 1.37e-05
 126 | 1.36e-05
 127 | 1.35e-05
 128 | 1.34e-05
 129 | 1.34e-05
 130 | 1.34e-05
 131 | 1.33e-05
 132 | 1.32e-05
 133 | 1.31e-05
 134 | 1.31e-05
 135 | 1.29e-05
 136 | 1.28e-05
 137 | 1.27e-05
 138 | 1.26e-05
 139 | 1.24e-05
 140 | 1.23e-05
 141 | 1.22e-05
 142 | 1.21e-05
 143 | 1.19e-05
 144 | 1.17e-05
 145 | 1.14e-05
 146 | 1.10e-05
 147 | 1.07e-05
 148 | 1.04e-05
 149 | 1.01e-05
 150 | 9.79e-06
object <- rebuildMatrix(object, features = NULL, assay = "RNA", slot = "data", method = 'mca', dim = 20, avc = NULL)
1.114 sec elapsed
73.585 sec elapsed
3.303 sec elapsed

Let’s check prediction activity using AUC.

No dimension reduction

object@reductions[['active.method']] <- NULL
Bscore <- computeResponse(object, gene.rate = grate, celltype = NULL, signature = 'Bcell_activation')
label <- ann[rownames(Bscore), 'seurat_annotations']
label[label != 'B'] = 'others'
roc_empirical <- rocit(score = Bscore[,'Bcell_activation'], class = label, negref = "others")
title <- paste0('AUC:', strsplit(as.character(summary(roc_empirical)[4,]), ':')[[1]][2])
                            
 Method used: empirical     
 Number of positive(s): 344 
 Number of negative(s): 2294
 Area under curve: 0.8702   
plot(roc_empirical, YIndex = F, values = F, col = c(2,1))
text(0.1, 0.6, title)

Dimension reduction by NMF

object@reductions[['active.method']] <- 'nmf'
Bscore <- computeResponse(object, gene.rate = grate, celltype = NULL, signature = 'Bcell_activation')
roc_empirical <- rocit(score = Bscore[,'Bcell_activation'], class = label, negref = "others")
title <- paste0('AUC:', strsplit(as.character(summary(roc_empirical)[4,]), ':')[[1]][2])
                            
 Method used: empirical     
 Number of positive(s): 344 
 Number of negative(s): 2294
 Area under curve: 0.9713   
plot(roc_empirical, YIndex = F, values = F, col = c(2,1))
text(0.1, 0.6, title)

Dimension reduction by PCA

object@reductions[['active.method']] <- 'pca'
Bscore <- computeResponse(object, gene.rate = grate, celltype = NULL, signature = 'Bcell_activation')
roc_empirical <- rocit(score = Bscore[,'Bcell_activation'], class = label, negref = "others")
title <- paste0('AUC:', strsplit(as.character(summary(roc_empirical)[4,]), ':')[[1]][2])
                            
 Method used: empirical     
 Number of positive(s): 344 
 Number of negative(s): 2294
 Area under curve: 0.9999   
plot(roc_empirical, YIndex = F, values = F, col = c(2,1))
text(0.1, 0.6, title)

Dimension reduction by MCA

object@reductions[['active.method']] <- 'mca'
Bscore <- computeResponse(object, gene.rate = grate, celltype = NULL, signature = 'Bcell_activation')
roc_empirical <- rocit(score = Bscore[,'Bcell_activation'], class = label, negref = "others")
title <- paste0('AUC:', strsplit(as.character(summary(roc_empirical)[4,]), ':')[[1]][2])
                            
 Method used: empirical     
 Number of positive(s): 344 
 Number of negative(s): 2294
 Area under curve: 0.9999   
plot(roc_empirical, YIndex = F, values = F, col = c(2,1))
text(0.1, 0.6, title)

sessionInfo

R version 4.4.3 (2025-02-28)
Platform: x86_64-apple-darwin20
Running under: macOS Ventura 13.7.4

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Asia/Shanghai
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods  
[7] base     

other attached packages:
 [1] dplyr_1.1.4        patchwork_1.3.0    ggplot2_3.5.1     
 [4] Matrix_1.7-3       ROCit_2.1.2        Seurat_5.2.1      
 [7] SeuratObject_5.0.2 sp_2.2-0           SaaSc_0.1.0       
[10] rmarkdown_2.29    

loaded via a namespace (and not attached):
  [1] RcppAnnoy_0.0.22            splines_4.4.3              
  [3] later_1.4.1                 R.oo_1.27.0                
  [5] tibble_3.2.1                polyclip_1.10-7            
  [7] fastDummies_1.7.5           lifecycle_1.0.4            
  [9] globals_0.16.3              lattice_0.22-6             
 [11] MASS_7.3-64                 magrittr_2.0.3             
 [13] plotly_4.10.4               sass_0.4.9                 
 [15] jquerylib_0.1.4             yaml_2.3.10                
 [17] httpuv_1.6.15               sctransform_0.4.1          
 [19] askpass_1.2.1               spam_2.11-1                
 [21] spatstat.sparse_3.1-0       reticulate_1.41.0.1        
 [23] cowplot_1.1.3               pbapply_1.7-2              
 [25] RColorBrewer_1.1-3          abind_1.4-8                
 [27] zlibbioc_1.52.0             Rtsne_0.17                 
 [29] GenomicRanges_1.58.0        R.utils_2.13.0             
 [31] purrr_1.0.4                 downlit_0.4.4              
 [33] BiocGenerics_0.52.0         GenomeInfoDbData_1.2.13    
 [35] IRanges_2.40.1              S4Vectors_0.44.0           
 [37] ggrepel_0.9.6               irlba_2.3.5.1              
 [39] listenv_0.9.1               spatstat.utils_3.1-2       
 [41] umap_0.2.10.0               goftest_1.2-3              
 [43] RSpectra_0.16-2             spatstat.random_3.3-2      
 [45] fitdistrplus_1.2-2          parallelly_1.42.0          
 [47] codetools_0.2-20            DelayedArray_0.32.0        
 [49] scuttle_1.16.0              tidyselect_1.2.1           
 [51] UCSC.utils_1.2.0            distill_1.6                
 [53] farver_2.1.2                viridis_0.6.5              
 [55] ScaledMatrix_1.14.0         matrixStats_1.5.0          
 [57] stats4_4.4.3                spatstat.explore_3.3-4     
 [59] jsonlite_1.9.1              BiocNeighbors_2.0.1        
 [61] progressr_0.15.1            ggridges_0.5.6             
 [63] survival_3.8-3              scater_1.34.1              
 [65] tictoc_1.2.1                tools_4.4.3                
 [67] ica_1.0-3                   Rcpp_1.0.14                
 [69] glue_1.8.0                  gridExtra_2.3              
 [71] SparseArray_1.6.2           mgcv_1.9-1                 
 [73] xfun_0.51                   MatrixGenerics_1.18.1      
 [75] GenomeInfoDb_1.42.3         withr_3.0.2                
 [77] fastmap_1.2.0               fansi_1.0.6                
 [79] openssl_2.3.2               RcppML_0.3.7               
 [81] digest_0.6.37               rsvd_1.0.5                 
 [83] R6_2.6.1                    mime_0.12                  
 [85] colorspace_2.1-1            scattermore_1.2            
 [87] tensor_1.5                  spatstat.data_3.1-4        
 [89] R.methodsS3_1.8.2           tidyr_1.3.1                
 [91] generics_0.1.3              data.table_1.17.0          
 [93] httr_1.4.7                  htmlwidgets_1.6.4          
 [95] S4Arrays_1.6.0              uwot_0.2.3                 
 [97] pkgconfig_2.0.3             gtable_0.3.6               
 [99] lmtest_0.9-40               SingleCellExperiment_1.28.1
[101] XVector_0.46.0              htmltools_0.5.8.1          
[103] dotCall64_1.2               bookdown_0.42              
[105] fgsea_1.32.0                scales_1.3.0               
[107] Biobase_2.66.0              png_0.1-8                  
[109] spatstat.univar_3.1-2       knitr_1.50                 
[111] rstudioapi_0.17.1           reshape2_1.4.4             
[113] nlme_3.1-167                cachem_1.1.0               
[115] zoo_1.8-13                  stringr_1.5.1              
[117] KernSmooth_2.23-26          vipor_0.4.7                
[119] parallel_4.4.3              miniUI_0.1.1.1             
[121] pillar_1.10.1               grid_4.4.3                 
[123] vctrs_0.6.5                 RANN_2.6.2                 
[125] promises_1.3.2              BiocSingular_1.22.0        
[127] beachmat_2.22.0             xtable_1.8-4               
[129] cluster_2.1.8               beeswarm_0.4.0             
[131] CelliD_1.14.0               evaluate_1.0.3             
[133] cli_3.6.4                   compiler_4.4.3             
[135] rlang_1.1.5                 crayon_1.5.3               
[137] future.apply_1.11.3         labeling_0.4.3             
[139] RcppArmadillo_14.2.3-1      plyr_1.8.9                 
[141] ggbeeswarm_0.7.2            stringi_1.8.4              
[143] viridisLite_0.4.2           deldir_2.0-4               
[145] BiocParallel_1.40.0         munsell_0.5.1              
[147] lazyeval_0.2.2              spatstat.geom_3.3-5        
[149] RcppHNSW_0.6.0              future_1.34.0              
[151] shiny_1.10.0                SummarizedExperiment_1.36.0
[153] ROCR_1.0-11                 fontawesome_0.5.3          
[155] igraph_2.1.4                memoise_2.0.1              
[157] bslib_0.9.0                 fastmatch_1.1-6